###Install Packages where necessary
#install.packages(c("tidyverse","haven","sandwich","lmtest","lme4","ggpubr","texreg"))


## Load required libraries
library(tidyverse)
library(haven)
library(sandwich)
library(lmtest)
library(lme4)
library(ggpubr)
library(texreg)


### Loading data and Recoding Variables
data <- read.csv("C:/Users/tony_/Downloads/data_replication.csv")
data$lunch <- data2$lunch
data$judge <- as.factor(data$judge)
data$attorney <- !is.na(data$attorney)
data$time_break <- data$time_break/3600
data$time_breaks <- ifelse(is.na(data$time_break_hearing),data$time_break,data$time_break_hearing)
data$time_breaks <- data$time_breaks/3600
data$trial <- !is.na(data$timeday_hearing)
data$time_break_hearing <- data$time_break_hearing/3600

data$type.y <- relevel(as.factor(data$type.y),ref="speeding")

### Regression Models ###
model1 <- lm(dismissed~time_break+type.y+attorney+ncharges+plea+judge,data=data)
summary(model1)

robustsummary <- function(model) { 

  SE_robust <- sqrt(diag(vcovCL(model, cluster = data$judge)))
  summ <- summary(model)
  summ$coefficients[,2] <- SE_robust
  summ$coefficients[,3] <- summ$coefficients[,1]/SE_robust # get t-stats
  summ$coefficients[,4] <- 2*(1-pt(abs(summ$coefficients[,3]),summ$df[2]))
  summ
}
robustsummary2 <- function(model) { 
  
  SE_robust <- sqrt(diag(vcovCL(model, cluster = data_cases$judge)))
  summ <- summary(model)
  summ$coefficients[,2] <- SE_robust
  summ$coefficients[,3] <- summ$coefficients[,1]/SE_robust # get t-stats
  summ$coefficients[,4] <- 2*(1-pt(abs(summ$coefficients[,3]),summ$df[2]))
  summ
}

model2 <- lm(dismissed~time_break_hearing+type.y+attorney+ncharges+plea+judge,data=data)
summary(model2)

model3 <- lm(dismissed~time_breaks+type.y+attorney+ncharges+plea+judge,data=data)
summary(model3)

data_cases <- data %>% group_by(CASE_ID) %>% mutate(wfine=sum(wfine1,na.rm=T)) %>% slice_head()

model4 <- lm(fine~time_breaks+type.y+judge+attorney+ncharges+plea+judge,data=data_cases)
summary(model4)
model5 <- lm(dratio~time_breaks+type.y+judge+attorney+ncharges+plea+judge,data=data_cases)
summary(model5)
model1 <- robustsummary(model1)
model2 <- robustsummary(model2)
model3 <- robustsummary(model3)
model4 <- robustsummary2(model4)
model5 <- robustsummary2(model5)

m1 <- texreg::extract(model1)
m2 <- texreg::extract(model2)
m3 <- texreg::extract(model3)
m4 <- texreg::extract(model4)
m5 <- texreg::extract(model5)
texreg::htmlreg(c(m1,m2,m3,m4,m5),file="main_tables.doc",digits=2)

###Logit
model1l <- glm(dismissed~time_break+type.y+attorney+ncharges+plea+judge,family=binomial, data=data)
summary(model1l)

model2l <- glm(dismissed~time_break_hearing+type.y+attorney+ncharges+plea,family=binomial,data=data)
summary(model2l)

model3l <- glm(dismissed~time_breaks+type.y+attorney+ncharges+plea,family=binomial,data=data)
summary(model3l)

m1 <- texreg::extract(model1l)
m2 <- texreg::extract(model2l)
m3 <- texreg::extract(model3l)
texreg::htmlreg(c(m1,m2,m3),file="glm_tables.doc",digits=2)

###Less and More Controls
model1aa <- lm(dismissed~time_breaks+judge+attorney+plea,data=data)
summary(model1aa)
model1ca <- lm(dismissed~time_break+judge+attorney,data=data)
summary(model1ca)
model1cb <- lm(dismissed~time_breaks+type.y+attorney+ncharges+plea+judge+lunch,data=data)
summary(model1cb)
model1cc <- lm(dismissed~time_breaks+type.y+attorney+ncharges+plea+judge+lunch+age+fta+wfine1,data=data)
summary(model1cc)


m1 <- texreg::extract(model1aa)
m2 <- texreg::extract(model1ca)
m3 <- texreg::extract(model1cb)
m4 <- texreg::extract(model1cc)


texreg::htmlreg(c(m1,m2,m3,m4),file="robust_tables.doc",digits=2)



### Figure 1: OLS Graphs#### 
###Coefficient Graph
library(dotwhisker)
m0_df <- broom::tidy(model1) %>% filter(term != "(Intercept)") %>% mutate(model = "Arraignments") %>% 
  filter(!grepl('type*', term)) %>% 
  filter(!grepl('judge*', term))

m0_df <- m0_df[c(1:4),]

m0_df$term = c("Hours since Break","Attorney", "Number of Charges", "Not Guilty Plea")


m1_df <- broom::tidy(model2) %>% filter(term != "(Intercept)") %>% mutate(model = "Trials") %>% 
  filter(!grepl('type*', term)) %>% 
  filter(!grepl('judge*', term))

m1_df <- m1_df[1:4,]

m1_df$term = c("Hours since Break","Attorney", "Number of Charges","Not Guilty Plea")

m2_df <- broom::tidy(model3) %>% filter(term != "(Intercept)") %>% mutate(model = "All Hearings") %>% 
  filter(!grepl('type*', term)) %>% 
  filter(!grepl('judge*', term))

m2_df <- m2_df[1:3,]

m2_df$term = c("Hours since Break","Attorney", "Number of Charges")
m1 <- rbind(m0_df,m1_df,m2_df)

p1 <- dotwhisker::dwplot(m1,dot_args = list(size=2) ) +geom_vline(xintercept = 0, colour = "grey60", linetype = 2)  + xlab("Change in Probability of Dismissal") + ylab("") +
  ggtitle("Model 1: Predicting Case Outcomes (OLS)") + theme_minimal()+
  theme(
    plot.title = element_text(face = "bold"), axis.text.x = element_text(face="bold"), axis.text.y = element_text(face="bold"))


#789 x 461

m1_df <- broom::tidy(model4) %>% filter(term != "(Intercept)") %>% mutate(model = "Fines (Case-level)") %>% 
  filter(!grepl('judge*', term)) %>% 
  filter(!grepl('type*', term))

m1_df <- m1_df[1:4,]
m1_df$term = c("Hours since Break","Attorney", "Number of Charges","Not Guilty Plea")


m2_df <- broom::tidy(model5) %>% filter(term != "(Intercept)") %>% mutate(model = "Fines (Case-level)") %>% 
  filter(!grepl('judge*', term)) %>% 
  filter(!grepl('type*', term))

m2_df <- m2_df[1:4,]
m2_df$term = c("Hours since Break","Attorney", "Number of Charges","Not Guilty Plea")

m2 <- rbind(m1_df,m2_df)

p2 <- dotwhisker::dwplot(m1_df,dot_args = list(size=2,color="green"),whisker_args = list(color="green")) +geom_vline(xintercept = 1, colour = "grey60", linetype = 2)  + xlab("Change in Fines ($)") + ylab("") +
  ggtitle("Model 2: Predicting Fines (OLS)") + theme_minimal()+
  theme(
    plot.title = element_text(face = "bold"), axis.text.x = element_text(face="bold"), axis.text.y = element_text(face="bold"))
p2

m2_df$model <- "Dismissal Ratio"
p3 <- dotwhisker::dwplot(m2_df,dot_args = list(size=2,color="orange"), whisker_args = list(color="orange")) +geom_vline(xintercept = 0, colour = "grey60", linetype = 2)  + xlab("Change in Dismissal Ratio") + ylab("") +
  ggtitle("Model 3: Predicting Dismissal Ratio (OLS)") + theme_minimal()+
  theme(
    plot.title = element_text(face = "bold"), axis.text.x = element_text(face="bold"), axis.text.y = element_text(face="bold"))

## Saving Figure
tiff("figure1.tiff", units="in", width=8, height=6, res=300)
gridExtra::grid.arrange(p1,p2,p3,ncol=1)
dev.off()

table(data$type.y)


###Figure 2: Marginal Effects Plot###


#newdata2 <- data.frame(time_break_hearing= seq(0.1,4.0,0.1), type.y= rep("speeding",40), judge=rep("11315936",40), attorney=rep(TRUE,40),ncharges=rep(median(data$ncharges),40), plea=rep(TRUE,40),trial=rep(FALSE,40),lunch=rep(FALSE,40))
#newdata2 <- cbind(newdata2,dismissal=predict(model2, newdata2, type = "response",se.fit = T))
#newdata2 <- newdata2 %>% rename(time_break=time_break_hearing)
#newdata <- rbind(newdata,newdata2)



model1 <- glm(dismissed~time_break+type.y+attorney+judge+ncharges+plea,family=gaussian,data=data)
summary(model1)
newdata<- data.frame( time_break= seq(0.1,4.0,0.1), type.y= rep("moving",40), judge=rep("11315936",40), attorney=rep(TRUE,40),ncharges=rep(median(data$ncharges),40), plea=rep(FALSE,40),trial=rep(TRUE,40),lunch=rep(TRUE,40))
newdata <- cbind(newdata,dismissal=predict(model1, newdata, type = "response",se.fit = T))


newdata$ub <- newdata$dismissal.fit + (newdata$dismissal.se.fit*1.96)
newdata$lb <- newdata$dismissal.fit - (newdata$dismissal.se.fit*1.96)

newdata$trial <- ifelse(newdata$trial==1,"trials","arraignments")

##Marginal Effect Plot
tiff("figure2.tiff", units="in", width=8, height=4, res=300)
ggplot(newdata, aes(time_break)) + 
  geom_line(aes(y=dismissal.fit),size=2, colour="#77C3EC") + 
  geom_ribbon(aes(ymin=lb, ymax=ub, fill=trial), alpha=0.2) + 
  ylab("Probability of Dismissal") +
  xlab("Hours since last break") +
  theme_bw() +
  theme(axis.text.y = element_text(hjust=0, size=rel(1.1)),
        panel.grid.major.y = element_line(size=0.25, colour="grey80", linetype="dotted"),
        panel.grid.major.x = element_line(size=0.25, colour="grey80", linetype="dotted"),  
        panel.grid.minor = element_blank(),
        axis.ticks = element_blank()) + theme(legend.position = "none")  
  dev.off()



### Balancing Tests###

data$window <- is.na(data$timeday)
table(data$window)
t.test(data[which(data$window==0),]$dismissed,data[which(data$window==1),]$dismissed)
table(data[which(data$window==1),]$dismissed)
t.test(data[which(data$window==0),]$fine,data[which(data$window==1),]$fine)


##Difference in Case types between window and fines.
group1<- table(data[which(data$window==0),]$type.y)
group2 <- table(data[which(data$window==1),]$type.y)

# Create a combined table
combined_table <- rbind(group1,group2)


total1 <- sum(group1)
total2 <- sum(group2)
hearing <- group1 / total1
window <- group2 / total2
proportions <- rbind(hearing, window)

# Print the proportions
print("Proportions:")
print(proportions)

# Perform Chi-Square test
chi_square_test <- chisq.test(combined_table)

# Print the Chi-Square test result
print(chi_square_test)


# Create contingency table
contingency_table <- table(group1, group2)

# Print the contingency table
print(contingency_table)

# Perform Chi-Square test
chi_square_test <- chisq.test(contingency_table)

# Print the Chi-Square test result
print(chi_square_test)

write.csv(datat,"ark-charge-data.csv")

###Figure A.1 : Varying Lunch Break Time ###
s <- data %>% select(plea_datetime_last,timeday)
datat <- data
coef1 <- 1:10
coef1_se <- 1:10
j<-1

##  Changing lunch time from 11:30 to 1:30 and storing model results
for(i in seq(10800,18000,300)){
  datat$time_break <- ifelse(data$timeday>i,data$timeday-i,data$timeday)
  datat$time_break <- ifelse(is.na(datat$time_break_hearing),datat$time_break,NA)
  datat$time_break <- datat$time_break/3600
  model_temp <- glm(dismissed~time_break+type.y+judge+attorney+ncharges,family=binomial(link="logit"),data=datat)
  coef1[j] <- model_temp$coefficients[2]
  coef1_se[j] <- coef(summary(model_temp))[2, "Std. Error"]
  j <- j+1
}

xt= c("11:30","11:35","11:40","11:45","11:50","11:55","12:00","12:05","12:10","12:15","12:20","12:25","12:30","12:35","12:40","12:45","12:50","12:55","13:00","13:05","13:10","13:15","13:20","13:25","13:30")
x <- seq(1,25)
upper = coef1 + (1.96*coef1_se)
lower = coef1 - (1.96*coef1_se)

tabs <- cbind(x,coef1,upper,lower)
tabs <- as.data.frame(tabs)
tabs$coef1 <- round(as.numeric(coef1),2)
tiff("figureA1.tiff", units="in", width=8, height=4, res=300)

ggplot(tabs,aes(x, coef1)) +        # ggplot2 plot with confidence intervals
  geom_point() +
    geom_errorbar(aes(ymin = lower, ymax = upper)) +geom_hline(yintercept=0,linetype="dotted", 
                                                               color = "red", size=0.5)+ scale_x_continuous(breaks=seq(1,25),labels = xt) + xlab("Time of Lunch") + ylab("Coefficient for Time") + theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

dev.off()
#### Figure A2 : Average Dismissal Rates before and After Lunch ####
## Load the Data
data <- read.csv("data_replication.csv")

##Recoding Variables for Figure
data$timeday <-  as.numeric(hm(data$pleatime))
data$starttime <- as.numeric(hm("07:30"))
data$lunchtime <- as.numeric(hm("12:00"))
data$timeday_hearing <- as.numeric(hm(data$hearingtime))
data$timeday <-ifelse(data$timeday<0,NA,data$timeday)
data$time_break <- ifelse(data$timeday>data$lunchtime,data$timeday-data$lunchtime,data$timeday-data$starttime)
data$timeday_hearing <-ifelse(data$timeday_hearing<0,NA,data$timeday_hearing)
data$time_break_hearing <- ifelse(data$timeday_hearing>data$lunchtime,data$timeday_hearing-data$lunchtime,data$timeday_hearing-data$starttime)
data$time <- ifelse(is.na(data$timeday_hearing),data$timeday,data$timeday_hearing)
data$time_breaks <- ifelse(is.na(data$time_break_hearing),data$time_break,data$time_break_hearing)
data$lunch <- data$time>data$lunchtime
data$trial <- !is.na(data$timeday_hearing)
data$judge <- as.factor(data$judge)
data$attorney <- !is.na(data$attorney)
data <- data %>% group_by(CASE_ID) %>% mutate(ncharges=n()) %>% ungroup()
datat <- data %>% select(caseid,CHARGE_NO,dismissed,fine,time_break,time_break_hearing,type.y,judge,attorney,plea,time,lunch,trial,ncharges,wfine1)

data <- data %>% mutate( timeofday= case_when(time>=27000+(3600*0) &time <27000+(3600*1) ~ "08:00",
                                              time>=27000+(3600*1)  &time <27000+(3600*2) ~ "09:00",      
                                              time>=27000+(3600*2)  &time <27000+(3600*3) ~ "10:00",
                                              time>=27000+(3600*3)  &time <27000+(3600*4) ~ "11:00",
                                              time>=27000+(3600*4)  &time <27000+(3600*5) ~ "12:00",
                                              time>=27000+(3600*5)  &time <27000+(3600*6) ~ "13:00",
                                              time>=27000 +(3600*6) &time <27000+(3600*7) ~ "14:00",
                                              time>=27000+(3600*7)  &time <27000+(3600*8) ~ "15:00",
                                              time>=27000+(3600*8)  &time <27000+(3600*10) ~ "16:00",
                                              .default = NA_character_))
data$timeofday <- fct_relevel(data$timeofday,"08:00","09:00","10:00","11:00","12:00","13:00","14:00","15:00","16:00")



dismissal_rate <- data %>% 
  group_by(timeofday) %>%
  summarise(rate = mean(dismissed))
dismissal_rate$lunch <- 1
dismissal_rate[1:5,]$lunch <- 0
dismissal_rate$lunch <- as.factor(dismissal_rate$lunch)

dismissal_rate <- dismissal_rate %>% na.omit()

tiff("figureA2.tiff", units="in", width=8, height=4, res=300)
ggpubr::ggdotplot(dismissal_rate, x = "timeofday", y = "rate",fill = "lunch", 
                  ylab = "Dismissal Rate", xlab = "Time of Day")
dev.off()
